data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
print(head(iris))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rio)
## Warning: package 'rio' was built under R version 4.2.1
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:rio':
##
## export
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data<-import('Salaries.csv')
head(data)
## rank discipline yrs.since.phd yrs.service sex salary
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
tail(data)
## rank discipline yrs.since.phd yrs.service sex salary
## 392 Prof A 30 19 Male 151292
## 393 Prof A 33 30 Male 103106
## 394 Prof A 31 19 Male 150564
## 395 Prof A 42 25 Male 101738
## 396 Prof A 25 15 Male 95329
## 397 AsstProf A 8 4 Male 81035
dim(data)
## [1] 397 6
names(data)
## [1] "rank" "discipline" "yrs.since.phd" "yrs.service"
## [5] "sex" "salary"
#export(data,"data.xlsx")
write.table(data,"data.txt")
?sd
## starting httpd help server ...
## done
starwars
## # A tibble: 87 × 14
## name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Luke Skywa… 172 77 blond fair blue 19 male mascu… Tatooi…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu… Tatooi…
## 3 R2-D2 96 32 <NA> white,… red 33 none mascu… Naboo
## 4 Darth Vader 202 136 none white yellow 41.9 male mascu… Tatooi…
## 5 Leia Organa 150 49 brown light brown 19 fema… femin… Aldera…
## 6 Owen Lars 178 120 brown,… light blue 52 male mascu… Tatooi…
## 7 Beru White… 165 75 brown light blue 47 fema… femin… Tatooi…
## 8 R5-D4 97 32 <NA> white,… red NA none mascu… Tatooi…
## 9 Biggs Dark… 183 84 black light brown 24 male mascu… Tatooi…
## 10 Obi-Wan Ke… 182 77 auburn… fair blue-g… 57 male mascu… Stewjon
## # … with 77 more rows, 4 more variables: species <chr>, films <list>,
## # vehicles <list>, starships <list>, and abbreviated variable names
## # ¹hair_color, ²skin_color, ³eye_color, ⁴birth_year, ⁵homeworld
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
data()
library(dplyr)
newdata <- select(starwars, name, height, gender)
newdata
## # A tibble: 87 × 3
## name height gender
## <chr> <int> <chr>
## 1 Luke Skywalker 172 masculine
## 2 C-3PO 167 masculine
## 3 R2-D2 96 masculine
## 4 Darth Vader 202 masculine
## 5 Leia Organa 150 feminine
## 6 Owen Lars 178 masculine
## 7 Beru Whitesun lars 165 feminine
## 8 R5-D4 97 masculine
## 9 Biggs Darklighter 183 masculine
## 10 Obi-Wan Kenobi 182 masculine
## # … with 77 more rows
## # ℹ Use `print(n = ...)` to see more rows
newdata <- filter(starwars,
gender == "female" &
homeworld == "Ojom")
newdata
## # A tibble: 0 × 14
## # … with 14 variables: name <chr>, height <int>, mass <dbl>, hair_color <chr>,
## # skin_color <chr>, eye_color <chr>, birth_year <dbl>, sex <chr>,
## # gender <chr>, homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
## # ℹ Use `colnames()` to see all variable names
newdata <- filter(starwars,
homeworld %in% c("Alderaan", "Coruscant", "Endor"))
newdata
## # A tibble: 7 × 14
## name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Leia Organa 150 49 brown light brown 19 fema… femin… Aldera…
## 2 Wicket Syst… 88 20 brown brown brown 8 male mascu… Endor
## 3 Finis Valor… 170 NA blond fair blue 91 male mascu… Corusc…
## 4 Adi Gallia 184 50 none dark blue NA fema… femin… Corusc…
## 5 Bail Presto… 191 NA black tan brown 67 male mascu… Aldera…
## 6 Jocasta Nu 167 NA white fair blue NA fema… femin… Corusc…
## 7 Raymus Anti… 188 79 brown light brown NA male mascu… Aldera…
## # … with 4 more variables: species <chr>, films <list>, vehicles <list>,
## # starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,
## # ³eye_color, ⁴birth_year, ⁵homeworld
## # ℹ Use `colnames()` to see all variable names
newdata <- mutate(starwars,
height = ifelse(height < 75 | height > 200,
NA,
height))
newdata
## # A tibble: 87 × 14
## name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 Luke Skywa… 172 77 blond fair blue 19 male mascu… Tatooi…
## 2 C-3PO 167 75 <NA> gold yellow 112 none mascu… Tatooi…
## 3 R2-D2 96 32 <NA> white,… red 33 none mascu… Naboo
## 4 Darth Vader NA 136 none white yellow 41.9 male mascu… Tatooi…
## 5 Leia Organa 150 49 brown light brown 19 fema… femin… Aldera…
## 6 Owen Lars 178 120 brown,… light blue 52 male mascu… Tatooi…
## 7 Beru White… 165 75 brown light blue 47 fema… femin… Tatooi…
## 8 R5-D4 97 32 <NA> white,… red NA none mascu… Tatooi…
## 9 Biggs Dark… 183 84 black light brown 24 male mascu… Tatooi…
## 10 Obi-Wan Ke… 182 77 auburn… fair blue-g… 57 male mascu… Stewjon
## # … with 77 more rows, 4 more variables: species <chr>, films <list>,
## # vehicles <list>, starships <list>, and abbreviated variable names
## # ¹hair_color, ²skin_color, ³eye_color, ⁴birth_year, ⁵homeworld
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
data(msleep, package="ggplot2")
# what is the proportion of missing data for each variable?
pctmiss <- colSums(is.na(msleep))/nrow(msleep)
round(pctmiss, 2)
## name genus vore order conservation sleep_total
## 0.00 0.00 0.08 0.00 0.35 0.00
## sleep_rem sleep_cycle awake brainwt bodywt
## 0.27 0.61 0.00 0.33 0.00
library(ggplot2)
library(mosaicData)
## Warning: package 'mosaicData' was built under R version 4.2.1
data(Marriage)
barplot(table(Marriage$race),col = c(1,2,3,4))

p <- ggplot(mpg, aes(x=displ,
y=hwy,
color=class)) +
geom_point(size=3) +
labs(x = "Engine displacement",
y = "Highway Mileage",
color = "Car Class") +
theme_bw()
ggplotly(p)